# Direct outputs not eligible for public release

# GLOBAL SETTINGS --------------------------------------------------------------

options(
    scipen = 999,
    digits = 16,
    max.print = .Machine$integer.max,
    show.error.locations = TRUE,
    warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------
library(data.table)
library(Matrix)
library(zoo)
library(multiwayvcov)
library(lmtest)
library(checkmate)
library(futile.logger)
library(lfe)

library(remotes)
remotes::install_github("setzler/eventStudy/eventStudy")
library(eventStudy) # for DiD-IV

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1L)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

base::source("~/code/0-utility-functions/wald_es.R", local = TRUE)

# Read in lottery panel data
lottery_panel <- readRDS("~/population-panel-data/lottery_panel_data.rds")

lottery_panel[is.na(db_w2_wages), db_w2_wages := 0]
lottery_panel[is.na(spouse_w2_wages), spouse_w2_wages := 0]
lottery_panel[, primary_earner := as.integer(db_w2_wages >= spouse_w2_wages)]

lottery_panel[
    !is.na(age_sp) & !is.na(age),
    older_member := as.integer(age > age_sp)
]
lottery_panel[
    !is.na(age_sp) & !is.na(age),
    same_age := as.integer(age == age_sp)
]

lottery_panel[, age_case := as.logical(between(age, 21, 64, incbounds = TRUE))]
lottery_panel[, married := as.logical(married_ch == 1)]

ES_data <-
    ES_clean_data(
        long_data = copy(lottery_panel),
        outcomevar = "db_w2_wages",
        unit_var = "tin",
        cal_time_var = "tax_yr",
        onset_time_var = "win_yr",
        cluster_vars = "tin",
        omitted_event_time = -2,
        discrete_covars = c(
            "age",
            "age_sp",
            "db_w2_wages",
            "has_w2_wages",
            "spouse_w2_wages",
            "has_spouse_w2_wages",
            "primary_earner",
            "older_member",
            "same_age",
            "female"
        ),
        control_subset_var = "age_case",
        control_subset_event_time = 0,
        treated_subset_var = "age_case",
        treated_subset_event_time = 0,
        control_subset_var2 = "married",
        control_subset_event_time2 = -2,
        treated_subset_var2 = "married",
        treated_subset_event_time2 = -2,
        anticipation = 0,
        endog_var = "L_ann_multiperiod"
    )

# Now grab just the treated group in event time -2
# (as this is when we require them to be married)
ES_data_subset <-
    ES_data[treated == 1 & ref_event_time == -2 & catt_specific_sample == 1]
ES_data_subset[, catt_specific_sample := NULL]
ES_data <- NULL
rm(ES_data)

# Now need to copy each row to construct the winner vs spouse observations
ES_data_winners <-
    ES_data_subset[
        ,
        .(
            tin,
            tax_yr,
            female,
            has_w2_wages,
            db_w2_wages,
            primary_earner,
            older_member,
            same_age,
            age
        )
    ]

ES_data_spouses <-
    ES_data_subset[
        ,
        .(
            tin,
            tax_yr,
            female,
            has_spouse_w2_wages,
            spouse_w2_wages,
            primary_earner,
            older_member,
            same_age,
            age_sp
        )
    ]

ES_data_subset <- NULL
rm(ES_data_subset)

setnames(ES_data_spouses, "has_spouse_w2_wages", "has_w2_wages")
setnames(ES_data_spouses, "spouse_w2_wages", "db_w2_wages")
setnames(ES_data_spouses, "age_sp", "age")

ES_data_spouses[, female := abs(female - 1)]
ES_data_spouses[, primary_earner := abs(primary_earner - 1)]
ES_data_spouses[, older_member := abs(older_member - 1)]

ES_data_winners[, winner := 1L]
ES_data_spouses[, winner := 0L]

reg_dt <- rbindlist(list(ES_data_winners, ES_data_spouses), use.names = TRUE)
ES_data_winners <- NULL
ES_data_spouses <- NULL
rm(ES_data_winners, ES_data_spouses)

# Conduct a joint F-test of the randomization hypothesis
print(
    summary(
        felm(
            as.formula("winner ~  has_w2_wages + female + primary_earner + older_member + same_age + db_w2_wages + age | tin + tax_yr | 0 | tin"), # nolint
            data = copy(reg_dt)
        )
    )
)

# Now make balance table inputs
summary_table <-
    reg_dt[
        ,
        .(
            employment = mean(has_w2_wages),
            wages = mean(db_w2_wages),
            age = mean(age),
            female = mean(female),
            primary_earner = mean(primary_earner),
            older_member = mean(older_member),
            same_age = mean(same_age)
        ),
        by = .(winner)
    ]

saveRDS(
    summary_table,
    sprintf("~/estimation-output/winner_identity_regression_results.rds", outcome) # nolint
)

reg_dt <- NULL
summary_table <- NULL
rm(reg_dt, result_list)
